capture log close
clear all
set maxvar 10000
set more off
pause off
# delimit;

local log "";
local crime "";
local credit "";
local data "";


/* 
Main Prep 1
*/

/* *************************************** */
/* **** PREP ORIGINAL FILES FOR MERGE **** */
/* *************************************** */
local state_list 
		state=="ar" | state=="az" | state=="ca" | state=="co" | state=="ct" | state=="fl" | 
		state=="il" | state=="in" | state=="ks" | state=="md" | state=="mi" | state=="mn" | 
		state=="ms" | state=="nc" | state=="nd" | state=="ne" | state=="nj" | state=="oh" | 
		state=="or" | state=="pa" | state=="tx" | state=="va" | state=="wa" | state=="wi" ;
local foreach_st 		"ar az ca co ct fl ks il in md mi mn ms nc nd ne nj oh or pa tx wa wi" ; 
local foreach_st_no1 	"   az ca co ct fl ks il in md mi mn ms nc nd ne nj oh or pa tx wa wi" ; 

use 	 "`crime'\prwora_age18_crimekid.dta", clear;

capture drop combpmt_2004* combpmt_2005* ;

gen state_ssa = substr(pdscc,1,2) ; 
destring state_ssa, replace ;
merge m:1 state_ssa using "`credit'\state_codes.dta", keepusing(state_abbrev) ;
drop if _merge == 2 ;
drop _merge ;		
gen state=lower(state_abbrev);
drop state_abbrev state_ssa ;

/* Keep only states in CJARS data */
keep if `state_list' ;

gen idu=_n;

rename fssr_lastn last_name;
rename fssr_firstn first_name;
replace fssr_middlen="" if fssr_middlen=="-";
gen middle_name = substr(fssr_middlen, 1, 1);

gen dob_mm = month(dob) ;
gen dob_dd = day(dob) ;

drop if last_name=="";
drop if first_name=="";
drop if dob==. ;

foreach sp_char in "." "," ";" "&" "#" "(" ")" "'" ":" "-" 
			   "{" "}" "*" "[" "]" "`" "'" " " {;
qui replace last_name   = subinstr(last_name,   "`sp_char'", "",.);
qui replace first_name  = subinstr(first_name,  "`sp_char'", "",.);
qui replace middle_name = subinstr(middle_name, "`sp_char'", "",.);
};

save "`crime'\831\crime_prwora_formerge_prep.dta", replace;


/* **************************************************************** */
/* **** MERGE PROCESS **** */
/* **************************************************************** */	

local required_var1 "st last_name first_name middle_name dob"; /* Part1: 1st best match */
local required_var2 "st last_name first_name dob"; 			 /* Part2: 2nd best match (no middle name) */
local required_var1va "st last_name first_name middle_name dob_mm dob_dd" ; /* Part1 for VA: VA only has month and day of birth */
local required_var2va "st last_name first_name dob_mm dob_dd" ; /* Part2 for VA: VA only has month and day of birth */


/* ***************************************************************** */
/* ************* MERGE ALL STATES EXCEPT FOR VA ******************** */
/* ***************************************************************** */

/* ******************************** */
/* STEP 1: "USING File" Preparation */	
/* ******************************** */

/* ---------------------------------------------------------------------- */
/* PART 1 use: last_name first_name middle_name dob */
use "`crime'\831\crime_prwora_formerge_prep.dta", clear;
drop if state == "va" ; 										/* **** DROP VA FROM DATA **** */
sort `required_var1';
egen unique_idu = group(`required_var1'), missing; 
rename middle_name Umiddle_name;
sort unique_idu;
tempfile using_file_for_restore_pt1;
save "`using_file_for_restore_pt1'", replace;

/* Collapse the using dataset down to unique combination of `required_varX' */
rename Umiddle_name middle_name;
keep `required_var1' unique_idu;
bysort unique_idu: keep if _n == 1;
sort `required_var1';
tempfile using_file_unique_merge_pt1;
save `using_file_unique_merge_pt1', replace;

/* ---------------------------------------------------------------------- */
/* PART 2 use: last_name first_name dob */
use "`crime'\831\crime_prwora_formerge_prep.dta", clear;
drop if state == "va" ; 										/* **** DROP VA FROM DATA **** */
sort `required_var2';
egen unique_idu = group(`required_var2'), missing;
rename middle_name Umiddle_name;
sort unique_idu;
tempfile using_file_for_restore_pt2;
save "`using_file_for_restore_pt2'", replace;

/* Collapse the using dataset down to unique combination of `required_varX' */
rename Umiddle_name middle_name;
keep `required_var2' unique_idu;
bysort unique_idu: keep if _n == 1;
sort `required_var2';
tempfile using_file_unique_merge_pt2;
save "`using_file_unique_merge_pt2'", replace;


/* **************************************** */
/* STEP 2: Execute - Identify Exact Matches */	
/* **************************************** */

/* #### PART 1 START #### */
use "`crime'\merge\cjars_identifier_ar.dta", clear;
foreach st in 	`foreach_st_no1' { ;
	append using "`crime'\merge\cjars_identifier_`st'.dta" ;
} ;
rename lastn last_name ;
rename firstn first_name ;
rename middlen middle_name ;
gen idm=_n;
drop if last_name=="";
drop if first_name=="";

foreach sp_char in "." "," ";" "&" "#" "(" ")" "'" ":" "-" 
		   "{" "}" "*" "[" "]" "`" "'" " " {;
	qui replace last_name   = subinstr(last_name,   "`sp_char'", "",.);
	qui replace first_name  = subinstr(first_name,  "`sp_char'", "",.);
	qui replace middle_name = subinstr(middle_name, "`sp_char'", "",.);
};
count;

replace middle_name = substr(middle_name,1,1);

/* ******************** */
keep if middle_name != "";
/* ******************** */

sort `required_var1';
merge `required_var1' using "`using_file_unique_merge_pt1'", nokeep;
keep if _merge == 3;
drop _merge;

local perfectn1 = c(N);
noi di as result _n "`perfectn1' perfect matches found in Part 1" _n;

sort unique_idu;
merge unique_idu using "`using_file_for_restore_pt1'", nokeep;
keep if _merge == 3;
drop _merge unique_idu;
order Umiddle_name, a(middle_name);

gen byte part=1;
tempfile prwora_merge_pt1 ;
save `prwora_merge_pt1' ;
count;

/* #### PART 1 END #### */
/* ---------------------------------------------------------------------- */
/* #### PART 2 START #### */
use "`crime'\merge\cjars_identifier_ar.dta", clear;
foreach st in 	`foreach_st_no1' { ;
	append using "`crime'\merge\cjars_identifier_`st'.dta" ;
} ;
rename lastn last_name ;
rename firstn first_name ;
rename middlen middle_name ;
gen idm=_n;
drop if last_name=="";
drop if first_name=="";

foreach sp_char in "." "," ";" "&" "#" "(" ")" "'" ":" "-" 
		   "{" "}" "*" "[" "]" "`" "'" " " {;
	qui replace last_name   = subinstr(last_name,   "`sp_char'", "",.);
	qui replace first_name  = subinstr(first_name,  "`sp_char'", "",.);
	qui replace middle_name = subinstr(middle_name, "`sp_char'", "",.);
};
count;

replace middle_name = substr(middle_name,1,1);

/* ******************** */
keep if middle_name == "";
/* ******************** */

sort `required_var2';
merge `required_var2' using "`using_file_unique_merge_pt2'", nokeep;
keep if _merge == 3;
drop _merge;

local perfectn2 = c(N);
noi di as result _n "`perfectn2' perfect matches found in Part 2" _n;

sort unique_idu;
merge unique_idu using "`using_file_for_restore_pt2'", nokeep;
keep if _merge == 3;
drop _merge unique_idu;
order Umiddle_name, a(middle_name);

gen byte part=2;
tempfile prwora_merge_pt2 ;
save `prwora_merge_pt2' ;








/* ***************************************************************** */
/* ************************ MERGE VA SEPARATELY ******************** */
/* ***************************************************************** */

/* ******************************** */
/* STEP 1: "USING File" Preparation */	
/* ******************************** */

/* ---------------------------------------------------------------------- */
/* PART 1 use: last_name first_name middle_name dob */
use "`crime'\831\crime_prwora_formerge_prep.dta", clear;
keep if state == "va" ; 										/* **** keep only VA! **** */
sort `required_var1va';
egen unique_idu = group(`required_var1va'), missing; 
rename middle_name Umiddle_name;
sort unique_idu;
tempfile using_file_for_restore_pt1va;
save "`using_file_for_restore_pt1va'", replace;

/* Collapse the using dataset down to unique combination of `required_varX' */
rename Umiddle_name middle_name;
keep `required_var1va' unique_idu;
bysort unique_idu: keep if _n == 1;
sort `required_var1va';
tempfile using_file_unique_merge_pt1va;
save `using_file_unique_merge_pt1va', replace;

/* ---------------------------------------------------------------------- */
/* PART 2 use: last_name first_name dob */
use "`crime'\831\crime_prwora_formerge_prep.dta", clear;
keep if state == "va" ; 										/* **** keep only VA! **** */
sort `required_var2va';
egen unique_idu = group(`required_var2va'), missing;
rename middle_name Umiddle_name;
sort unique_idu;
tempfile using_file_for_restore_pt2va;
save "`using_file_for_restore_pt2va'", replace;

/* Collapse the using dataset down to unique combination of `required_varX' */
rename Umiddle_name middle_name;
keep `required_var2va' unique_idu;
bysort unique_idu: keep if _n == 1;
sort `required_var2va';
tempfile using_file_unique_merge_pt2va;
save "`using_file_unique_merge_pt2va'", replace;


/* **************************************** */
/* STEP 2: Execute - Identify Exact Matches */	
/* **************************************** */

/* #### PART 1 START #### */
use "`crime'\merge\cjars_identifier_va.dta", clear;
rename lastn last_name ;
rename firstn first_name ;
rename middlen middle_name ;
gen idm=_n;
drop if last_name=="";
drop if first_name=="";

foreach sp_char in "." "," ";" "&" "#" "(" ")" "'" ":" "-" 
		   "{" "}" "*" "[" "]" "`" "'" " " {;
	qui replace last_name   = subinstr(last_name,   "`sp_char'", "",.);
	qui replace first_name  = subinstr(first_name,  "`sp_char'", "",.);
	qui replace middle_name = subinstr(middle_name, "`sp_char'", "",.);
};
count;

replace middle_name = substr(middle_name,1,1);

/* ******************** */
keep if middle_name != "";
/* ******************** */

sort `required_var1va';
merge `required_var1va' using "`using_file_unique_merge_pt1va'", nokeep;
keep if _merge == 3;
drop _merge;

local perfectn1 = c(N);
noi di as result _n "`perfectn1' perfect matches found in Part 1" _n;

sort unique_idu;
merge unique_idu using "`using_file_for_restore_pt1va'", nokeep;
keep if _merge == 3;
drop _merge unique_idu;
order Umiddle_name, a(middle_name);

gen byte part=1;
tempfile prwora_merge_pt1va ;
save `prwora_merge_pt1va' ;
count;

/* #### PART 1 END #### */
/* ---------------------------------------------------------------------- */
/* #### PART 2 START #### */
use "`crime'\merge\cjars_identifier_va.dta", clear;
rename lastn last_name ;
rename firstn first_name ;
rename middlen middle_name ;
gen idm=_n;
drop if last_name=="";
drop if first_name=="";

foreach sp_char in "." "," ";" "&" "#" "(" ")" "'" ":" "-" 
		   "{" "}" "*" "[" "]" "`" "'" " " {;
	qui replace last_name   = subinstr(last_name,   "`sp_char'", "",.);
	qui replace first_name  = subinstr(first_name,  "`sp_char'", "",.);
	qui replace middle_name = subinstr(middle_name, "`sp_char'", "",.);
};
count;

replace middle_name = substr(middle_name,1,1);

/* ******************** */
keep if middle_name == "";
/* ******************** */

sort `required_var2va';
merge `required_var2va' using "`using_file_unique_merge_pt2va'", nokeep;
keep if _merge == 3;
drop _merge;

local perfectn2 = c(N);
noi di as result _n "`perfectn2' perfect matches found in Part 2" _n;

sort unique_idu;
merge unique_idu using "`using_file_for_restore_pt2va'", nokeep;
keep if _merge == 3;
drop _merge unique_idu;
order Umiddle_name, a(middle_name);

gen byte part=2;
tempfile prwora_merge_pt2va ;
save `prwora_merge_pt2va' ;









/* Append all parts */
append using `prwora_merge_pt1va';
append using `prwora_merge_pt1';
append using `prwora_merge_pt2';

/* Sort and drop duplicate merges */
sort cjars_id hun part ;
duplicates drop cjars_id hun, force ;

/* Merge stats */
noi tab part;

/* Exporting final merged file */
save "`crime'\prep1\crime_prwora_merged.dta", replace;


log close;
